Filestructure and Shell Commands

# making a new directory for this lab
mkdir lab05

# changing the directory to that lab
cd lab05

# making subdirectories
mkdir data
mkdir report
mkdir images
ls

# making a README file
touch README.md

# changing directories and downloading the data
cd data
curl -O https://raw.githubusercontent.com/ucb-stat133/stat133-fall-2018/master/data/nba2018-players.csv
 ls
 
 # briefly checking out the data
 wc nba2018-players.csv
 head nba2018-players.csv
 tail nba2018-players.csv

NBA Players Data

library (ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)

# with "readr" read_csv()
dat <- read_csv("../data/nba2018-players.csv")
## Parsed with column specification:
## cols(
##   player = col_character(),
##   team = col_character(),
##   position = col_character(),
##   height = col_integer(),
##   weight = col_integer(),
##   age = col_integer(),
##   experience = col_integer(),
##   college = col_character(),
##   salary = col_double(),
##   games = col_integer(),
##   minutes = col_integer(),
##   points = col_integer(),
##   points3 = col_integer(),
##   points2 = col_integer(),
##   points1 = col_integer()
## )

Filtering, Slicing, and Selecting

# first three rows
three_rows <- slice(dat, 1:3)
three_rows
## # A tibble: 3 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Al Ho… BOS   C            82    245    30          9 Univer… 2.65e7    68
## 2 Amir … BOS   PF           81    240    29         11 <NA>    1.20e7    80
## 3 Avery… BOS   SG           74    180    26          6 Univer… 8.27e6    55
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>
# subset rows given a condition
# (height greater than 85 inches)
gt_85 <- filter(dat, height > 85)
gt_85
## # A tibble: 5 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Edy T… CLE   C            87    260    24          1 <NA>    5.14e3     1
## 2 Boban… DET   C            87    290    28          1 <NA>    7.00e6    35
## 3 Krist… NYK   PF           87    240    21          1 <NA>    4.32e6    66
## 4 Roy H… DEN   C            86    270    30          8 George… 5.00e6     6
## 5 Alexi… NOP   C            86    248    28          6 <NA>    4.60e6    39
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>
# columns by name
player_height <- select(dat, player, height)

# use slice() to subset the data by selecting the first 5 rows.
slice(dat, 1:5)
## # A tibble: 5 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Al Ho… BOS   C            82    245    30          9 Univer… 2.65e7    68
## 2 Amir … BOS   PF           81    240    29         11 <NA>    1.20e7    80
## 3 Avery… BOS   SG           74    180    26          6 Univer… 8.27e6    55
## 4 Demet… BOS   PG           73    201    22          0 Univer… 1.45e6     5
## 5 Geral… BOS   SF           79    205    31          9 <NA>    1.41e6    47
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>
# use slice() to subset the data by selecting rows 10, 15, 20, …, 50.
slice(dat, 2:10*5)
## # A tibble: 9 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Jonas… BOS   PF           82    231    29          6 <NA>    5.00e6    78
## 2 Tyler… BOS   C            84    253    27          4 Univer… 8.00e6    51
## 3 Derri… CLE   PF           80    240    25          5 Univer… 4.02e5    25
## 4 Jorda… CLE   SG           78    185    25          1 Univer… 8.75e5    37
## 5 Larry… CLE   C            83    235    28          5 Virgin… 2.08e5     5
## 6 Cory … TOR   PG           75    193    25          5 Univer… 7.33e6    80
## 7 Jakob… TOR   C            84    248    21          0 Univer… 2.70e6    54
## 8 P.J. … TOR   SF           78    245    31          5 Univer… 5.30e6    24
## 9 Bradl… WAS   SG           77    207    23          4 Univer… 2.21e7    77
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>
# use slice() to subset the data by selecting the last 5 rows.
slice(dat, -n()+5:n())
## # A tibble: 5 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Marqu… PHO   PF           82    233    19          0 Univer… 2.94e6    82
## 2 Ronni… PHO   PG           74    190    33         11 Nichol… 4.65e5    14
## 3 T.J. … PHO   SF           80    230    23          2 North … 2.13e6    66
## 4 Tyler… PHO   PG           70    150    21          0 Univer… 9.18e5    61
## 5 Tyson… PHO   C            85    240    34         15 <NA>    1.24e7    47
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>
# use filter() to subset those players with height less than 70 inches tall.
filter(dat, height < 70)
## # A tibble: 2 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Isaia… BOS   PG           69    185    27          5 Univer… 6.59e6    76
## 2 Kay F… CLE   PG           69    176    21          0 Oaklan… 5.43e5    42
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>
# use filter() to subset rows of Golden State Warriors (‘GSW’).
filter(dat, team == "GSW")
## # A tibble: 16 x 15
##    player team  position height weight   age experience college salary
##    <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl>
##  1 Ander… GSW   C            82    273    34         12 <NA>    1.55e6
##  2 Andre… GSW   SF           78    215    33         12 Univer… 1.11e7
##  3 Damia… GSW   C            84    245    21          0 Vander… 1.17e6
##  4 David… GSW   C            81    250    36         13 Xavier… 1.55e6
##  5 Draym… GSW   PF           79    230    26          4 Michig… 1.53e7
##  6 Ian C… GSW   SG           75    175    25          3 Belmon… 1.02e6
##  7 James… GSW   PF           81    230    24          2 Univer… 9.80e5
##  8 JaVal… GSW   C            84    270    29          8 Univer… 1.40e6
##  9 Kevin… GSW   PF           81    240    28          9 Univer… 2.65e7
## 10 Kevon… GSW   C            81    220    20          1 Univer… 1.18e6
## 11 Klay … GSW   SG           79    215    26          5 Washin… 1.67e7
## 12 Matt … GSW   SF           79    226    36         13 Univer… 3.83e5
## 13 Patri… GSW   SG           79    185    21          0 Univer… 5.43e5
## 14 Shaun… GSW   PG           79    192    31         11 <NA>    5.78e6
## 15 Steph… GSW   PG           75    190    28          7 Davids… 1.21e7
## 16 Zaza … GSW   C            83    270    32         13 <NA>    2.90e6
## # ... with 6 more variables: games <int>, minutes <int>, points <int>,
## #   points3 <int>, points2 <int>, points1 <int>
# use filter() to subset rows of GSW centers (‘C’).
filter(dat, team == "GSW", position == "C")
## # A tibble: 6 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Ander… GSW   C            82    273    34         12 <NA>    1.55e6    14
## 2 Damia… GSW   C            84    245    21          0 Vander… 1.17e6    10
## 3 David… GSW   C            81    250    36         13 Xavier… 1.55e6    68
## 4 JaVal… GSW   C            84    270    29          8 Univer… 1.40e6    77
## 5 Kevon… GSW   C            81    220    20          1 Univer… 1.18e6    53
## 6 Zaza … GSW   C            83    270    32         13 <NA>    2.90e6    70
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>
# use filter() and then select(), to subset rows of lakers (‘LAL’), and then display their names.
select(filter(dat, team == "LAL"), player)
## # A tibble: 14 x 1
##    player           
##    <chr>            
##  1 Brandon Ingram   
##  2 Corey Brewer     
##  3 D'Angelo Russell 
##  4 David Nwaba      
##  5 Ivica Zubac      
##  6 Jordan Clarkson  
##  7 Julius Randle    
##  8 Luol Deng        
##  9 Metta World Peace
## 10 Nick Young       
## 11 Tarik Black      
## 12 Thomas Robinson  
## 13 Timofey Mozgov   
## 14 Tyler Ennis
# use filter() and then select(), to display the name and salary, of GSW point guards
select(filter(dat, team == "GSW", position == "PG"), player, salary)
## # A tibble: 2 x 2
##   player             salary
##   <chr>               <dbl>
## 1 Shaun Livingston  5782450
## 2 Stephen Curry    12112359
# find how to select the name, age, and team, of players with more than 10 years of experience, making 10 million dollars or less.
select(filter(dat, experience > 10, salary <= 10000000), player, age, team)
## # A tibble: 36 x 3
##    player              age team 
##    <chr>             <int> <chr>
##  1 Andrew Bogut         32 CLE  
##  2 Dahntay Jones        36 CLE  
##  3 Deron Williams       32 CLE  
##  4 James Jones          36 CLE  
##  5 Kyle Korver          35 CLE  
##  6 Richard Jefferson    36 CLE  
##  7 Jose Calderon        35 ATL  
##  8 Kris Humphries       31 ATL  
##  9 Mike Dunleavy        36 ATL  
## 10 Jason Terry          39 MIL  
## # ... with 26 more rows
# find how to select the name, team, height, and weight, of rookie players, 20 years old, displaying only the first five occurrences (i.e. rows)
slice(select(filter(dat, experience == 0, age == 20), player, team, height, weight), 1:5)
## # A tibble: 5 x 4
##   player            team  height weight
##   <chr>             <chr>  <int>  <int>
## 1 Jaylen Brown      BOS       79    225
## 2 Henry Ellenson    DET       83    245
## 3 Stephen Zimmerman ORL       84    240
## 4 Dejounte Murray   SAS       77    170
## 5 Chinanu Onuaku    HOU       82    245

Adding New Variables and Reordering Rows

# creating a small data frame step by step
gsw <- filter(dat, team == 'GSW')
gsw <- select(gsw, player, height, weight)
gsw <- slice(gsw, c(4, 8, 10, 14, 15))
gsw
## # A tibble: 5 x 3
##   player           height weight
##   <chr>             <int>  <int>
## 1 David West           81    250
## 2 JaVale McGee         84    270
## 3 Kevon Looney         81    220
## 4 Shaun Livingston     79    192
## 5 Stephen Curry        75    190
# (temporarily) add a column with the ratio height / weight:
mutate(gsw, height / weight)
## # A tibble: 5 x 4
##   player           height weight `height/weight`
##   <chr>             <int>  <int>           <dbl>
## 1 David West           81    250           0.324
## 2 JaVale McGee         84    270           0.311
## 3 Kevon Looney         81    220           0.368
## 4 Shaun Livingston     79    192           0.411
## 5 Stephen Curry        75    190           0.395
# can also add name of col
mutate(gsw, ht_wt = height / weight)
## # A tibble: 5 x 4
##   player           height weight ht_wt
##   <chr>             <int>  <int> <dbl>
## 1 David West           81    250 0.324
## 2 JaVale McGee         84    270 0.311
## 3 Kevon Looney         81    220 0.368
## 4 Shaun Livingston     79    192 0.411
## 5 Stephen Curry        75    190 0.395
#need to assign in order to change
gsw2 <- mutate(gsw, ht_m = height * 0.0254, wt_kg = weight * 0.4536)
gsw2
## # A tibble: 5 x 5
##   player           height weight  ht_m wt_kg
##   <chr>             <int>  <int> <dbl> <dbl>
## 1 David West           81    250  2.06 113. 
## 2 JaVale McGee         84    270  2.13 122. 
## 3 Kevon Looney         81    220  2.06  99.8
## 4 Shaun Livingston     79    192  2.01  87.1
## 5 Stephen Curry        75    190  1.90  86.2
# order rows by height (increasingly) - by default
arrange(gsw, height)
## # A tibble: 5 x 3
##   player           height weight
##   <chr>             <int>  <int>
## 1 Stephen Curry        75    190
## 2 Shaun Livingston     79    192
## 3 David West           81    250
## 4 Kevon Looney         81    220
## 5 JaVale McGee         84    270
# order rows by height (decreasingly)
arrange(gsw, desc(height))
## # A tibble: 5 x 3
##   player           height weight
##   <chr>             <int>  <int>
## 1 JaVale McGee         84    270
## 2 David West           81    250
## 3 Kevon Looney         81    220
## 4 Shaun Livingston     79    192
## 5 Stephen Curry        75    190
# order rows by height, and then weight
arrange(gsw, height, weight)
## # A tibble: 5 x 3
##   player           height weight
##   <chr>             <int>  <int>
## 1 Stephen Curry        75    190
## 2 Shaun Livingston     79    192
## 3 Kevon Looney         81    220
## 4 David West           81    250
## 5 JaVale McGee         84    270

Adding New Variables and Reordering Rows: Your Turn

# using the data frame gsw, add a new variable product with the product of height and weight.
mutate(gsw, h_w = height*weight)
## # A tibble: 5 x 4
##   player           height weight   h_w
##   <chr>             <int>  <int> <int>
## 1 David West           81    250 20250
## 2 JaVale McGee         84    270 22680
## 3 Kevon Looney         81    220 17820
## 4 Shaun Livingston     79    192 15168
## 5 Stephen Curry        75    190 14250
# create a new data frame gsw3, by adding columns log_height and log_weight with the log transformations of height and weight.
gsw3 <- mutate(gsw, log_height = log(height), log_weight = log(weight))
gsw3
## # A tibble: 5 x 5
##   player           height weight log_height log_weight
##   <chr>             <int>  <int>      <dbl>      <dbl>
## 1 David West           81    250       4.39       5.52
## 2 JaVale McGee         84    270       4.43       5.60
## 3 Kevon Looney         81    220       4.39       5.39
## 4 Shaun Livingston     79    192       4.37       5.26
## 5 Stephen Curry        75    190       4.32       5.25
# use the original data frame to filter() and arrange() those players with height less than 71 inches tall, in increasing order.
arrange(filter(dat, height < 71), height)
## # A tibble: 4 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Isaia… BOS   PG           69    185    27          5 Univer… 6.59e6    76
## 2 Kay F… CLE   PG           69    176    21          0 Oaklan… 5.43e5    42
## 3 Pierr… DAL   PG           70    180    25          0 Baylor… 1.05e5     8
## 4 Tyler… PHO   PG           70    150    21          0 Univer… 9.18e5    61
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>
# display the name, team, and salary, of the top-5 highest paid players
slice(select(arrange(dat, desc(salary)),player, team, salary), 1:5)
## # A tibble: 5 x 3
##   player        team    salary
##   <chr>         <chr>    <dbl>
## 1 LeBron James  CLE   30963450
## 2 Al Horford    BOS   26540100
## 3 DeMar DeRozan TOR   26540100
## 4 Kevin Durant  GSW   26540100
## 5 James Harden  HOU   26540100
# display the name, team, and points3, of the top 10 three-point players
slice(select(arrange(dat, desc(points3)),player, team, points3), 1:10)
## # A tibble: 10 x 3
##    player         team  points3
##    <chr>          <chr>   <int>
##  1 Stephen Curry  GSW       324
##  2 Klay Thompson  GSW       268
##  3 James Harden   HOU       262
##  4 Eric Gordon    HOU       246
##  5 Isaiah Thomas  BOS       245
##  6 Kemba Walker   CHO       240
##  7 Bradley Beal   WAS       223
##  8 Damian Lillard POR       214
##  9 Ryan Anderson  HOU       204
## 10 J.J. Redick    LAC       201
# create a data frame gsw_mpg of GSW players, that contains variables for player name, experience, and min_per_game (minutes per game), sorted by min_per_game (in descending order)
gsw_mpg <-  select(arrange(filter(mutate(dat, min_per_game = minutes/games), team == "GSW"), desc(min_per_game)), player, experience, min_per_game)
gsw_mpg
## # A tibble: 16 x 3
##    player               experience min_per_game
##    <chr>                     <int>        <dbl>
##  1 Klay Thompson                 5        34.0 
##  2 Stephen Curry                 7        33.4 
##  3 Kevin Durant                  9        33.4 
##  4 Draymond Green                4        32.5 
##  5 Andre Iguodala               12        26.3 
##  6 Matt Barnes                  13        20.5 
##  7 Zaza Pachulia                13        18.1 
##  8 Shaun Livingston             11        17.7 
##  9 Patrick McCaw                 0        15.1 
## 10 Ian Clark                     3        14.8 
## 11 David West                   13        12.6 
## 12 JaVale McGee                  8         9.60
## 13 James Michael McAdoo          2         8.79
## 14 Damian Jones                  0         8.5 
## 15 Kevon Looney                  1         8.43
## 16 Anderson Varejao             12         6.57

Summarize and Group Operations

# average salary of NBA players
summarise(dat, avg_salary = mean(salary))
## # A tibble: 1 x 1
##   avg_salary
##        <dbl>
## 1   5804697.
# some stats for salary (dplyr)
summarise(
  dat, 
  min = min(salary),
  median = median(salary),
  avg = mean(salary),
  max = max(salary)
)
## # A tibble: 1 x 4
##     min  median      avg      max
##   <dbl>   <dbl>    <dbl>    <dbl>
## 1  5145 3000000 5804697. 30963450
# average salary, grouped by team
summarise(
  group_by(dat, team),
  avg_salary = mean(salary)
)
## # A tibble: 30 x 2
##    team  avg_salary
##    <chr>      <dbl>
##  1 ATL     5494447.
##  2 BOS     6127673.
##  3 BRK     4011351.
##  4 CHI     5781368.
##  5 CHO     5531548.
##  6 CLE     7069699.
##  7 DAL     5157128.
##  8 DEN     4648719.
##  9 DET     6871632.
## 10 GSW     6265160.
## # ... with 20 more rows
# average salary, grouped by position
summarise(
  group_by(dat, position),
  avg_salary = mean(salary)
)
## # A tibble: 5 x 2
##   position avg_salary
##   <chr>         <dbl>
## 1 C          6529906.
## 2 PF         5801127.
## 3 PG         5601217.
## 4 SF         6042455.
## 5 SG         5114178.
# average weight and height, by position, displayed in desceding order by average height:
arrange(
  summarise(
    group_by(dat, position),
    avg_height = mean(height),
    avg_weight = mean(weight)),
  desc(avg_height)
)
## # A tibble: 5 x 3
##   position avg_height avg_weight
##   <chr>         <dbl>      <dbl>
## 1 C              83.2       251.
## 2 PF             81.4       235.
## 3 SF             79.5       220.
## 4 SG             77.0       204.
## 5 PG             74.3       189.

Summarize and Group Operations: Your Turn

# use summarise() to get the largest height value.
summarise(dat, max_height = max(height))
## # A tibble: 1 x 1
##   max_height
##        <dbl>
## 1         87
# use summarise() to get the standard deviation of points3.
summarise(dat, sd_of_3pointers = sd(points3))
## # A tibble: 1 x 1
##   sd_of_3pointers
##             <dbl>
## 1            55.1
# use summarise() and group_by() to display the median of three-points, by team.
summarise(
  group_by(dat, team),
  median_of_3pointers = median(points3)
)
## # A tibble: 30 x 2
##    team  median_of_3pointers
##    <chr>               <dbl>
##  1 ATL                  32  
##  2 BOS                  46  
##  3 BRK                  36  
##  4 CHI                  28.5
##  5 CHO                  13  
##  6 CLE                  26.5
##  7 DAL                  18  
##  8 DEN                  46  
##  9 DET                  28  
## 10 GSW                  10.5
## # ... with 20 more rows
# display the average triple points by team, in ascending order, of the bottom-5 teams (worst 3pointer teams)
slice(
  arrange(
    summarise(
      group_by(dat, team),
      average_3pointers = mean(points3)
    ),
    average_3pointers
  ),
  1:5
)
## # A tibble: 5 x 2
##   team  average_3pointers
##   <chr>             <dbl>
## 1 NOP                32.4
## 2 PHO                33.5
## 3 ORL                34.3
## 4 SAC                35.1
## 5 CHI                35.3
# obtain the mean and standard deviation of age, for Power Forwards, with 5 and 10 years (including) years of experience.

summarise(
  filter(dat, position == "PF", experience >= 5 && experience <= 10),
  mean = mean(age),
  sd = sd(age)
)
## # A tibble: 1 x 2
##    mean    sd
##   <dbl> <dbl>
## 1  26.1  4.11

First contact with ggplot()

# scatterplot (option 1)
ggplot(data = dat) +
  geom_point(aes(x = points, y = salary))

# scatterplot (option 2)
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point()

# colored scatterplot 
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point(aes(color = position))

# sized and colored scatterplot 
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point(aes(color = position, size = points3))

# sized and colored scatterplot 
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point(aes(color = position, size = points3), alpha = 0.7)

ggplot(): Your Turn

# Use the data frame gsw to make a scatterplot of height and weight.
ggplot(data = gsw, aes(x = height, y = weight)) + geom_point()

# Find out how to make another scatterplot of height and weight, using geom_text() to display the names of the players.
ggplot(data = gsw, aes(x = height, y = weight)) + geom_point() + geom_text(label = gsw$player)

# Get a scatter plot of height and weight, for ALL the warriors, displaying their names with geom_label().
ggplot(data = filter(dat, team == "GSW"), aes(x = height, y = weight)) + geom_point() + geom_text(label = filter(dat, team == "GSW")$player)

# Get a density plot of salary (for all NBA players).
ggplot(data = dat, aes(x = salary)) + geom_density()

# Get a histogram of points2 with binwidth of 50 (for all NBA players).
ggplot(data = dat, aes(x = points2)) + geom_histogram(binwidth = 50)

# Get a barchart of the position frequencies (for all NBA players).
ggplot(data = dat, aes(x = position)) + geom_bar()

# Make a scatterplot of experience and salary of all Centers, and use geom_smooth() to add a regression line.
ggplot(data = filter(dat, position == "C"), aes(x = experience, y = salary)) + geom_point() + geom_smooth(method = lm)

# Repeat the same scatterplot of experience and salary of all Centers, but now use geom_smooth() to add a loess line (i.e. smooth line).
ggplot(data = filter(dat, position == "C"), aes(x = experience, y = salary)) + geom_point() + geom_smooth(method = loess)

Faceting

# scatterplot by position
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point() +
  facet_wrap(~ position)

# scatterplot by position (vertical)
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point(aes(color = position), alpha = 0.7) +
  facet_grid(~ position) +
  geom_smooth(method = loess)

# scatterplot by position (horizontal)
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point(aes(color = position), alpha = 0.7) +
  facet_grid(position ~ .) +
  geom_smooth(method = loess)

Faceting: Your Turn

# Make scatterplots of experience and salary faceting by position
ggplot(data = dat, aes(x = experience, y = salary)) + geom_point() + facet_wrap(~position)

# Make scatterplots of experience and salary faceting by team
ggplot(data = dat, aes(x = experience, y = salary)) + geom_point() + facet_wrap(~team)

# Make density plots of age faceting by team
ggplot(data = dat, aes(x = age)) + geom_density() + facet_wrap(~team)

# Make scatterplots of height and weight faceting by position
ggplot(data = dat, aes(x = height, y = weight)) + geom_point() + facet_wrap(~position)

# Make scatterplots of height and weight, with a 2-dimensional density, geom_density2d(), faceting by position
ggplot(data = dat, aes(x = height, y = weight)) + geom_density2d() + facet_wrap(~position)

# Make a scatterplot of experience and salary for the Warriors, but this time add a layer with theme_bw() to get a simpler background
ggplot(data = filter(dat, team == "GSW"), aes(x = experience, y = salary)) + geom_point() + theme_bw()

# Repeat any of the previous plots but now adding a leyer with another theme e.g. theme_minimal(), theme_dark(), theme_classic()
ggplot(data = dat, aes(x = height, y = weight)) + geom_point() + facet_wrap(~position) + theme_minimal()

ggplot(data = dat, aes(x = height, y = weight)) + geom_point() + facet_wrap(~position) + theme_dark()

ggplot(data = dat, aes(x = height, y = weight)) + geom_point() + facet_wrap(~position) + theme_classic()

More Shell Commands

# Move inside the images/ directory of the lab.
cd ~/Desktop/hw-stat133/lab05/images

# List the contents of this directory.
ls

# Now list the contents of the directory in long format.
ls -1

# How would you list the contents in long format, by time?
ls -1 -t

# How would you list the contents displaying the results in reverse (alphabetical)? order without changing your current directory
ls -1 -r

#create a directory copies at the parent level (i.e. lab05/).
mkdir copies

# Copy one of the PNG files to the copies folder.
cp images/unnamed-chunk-9-2.png copies

# Use the wildcard * to copy all the .png files in the directory copies.
cp images/*.png copies

# Change to the directory copies.
cd copies

# Use the command mv to rename some of your PNG files.
mv unnamed-chunk-8-1.png image1.png
mv unnamed-chunk-8-2.png image2.png
mv unnamed-chunk-8-3.png image3.png
mv unnamed-chunk-8-4.png image4.png

# Change to the report/ directory.
cd ~/Desktop/hw-stat133/lab05/report

# From within report/, find out how to rename the directory copies as copy-files.
mv ~/Desktop/hw-stat133/lab05/copies ~/Desktop/hw-stat133/lab05/copy_files

# From within report/, delete one or two PNG files in copy-files.
rm ~/Desktop/hw-stat133/lab05/copy_files/image1.png

# From within report/, find out how to delete the directory copy-files.
rm -r ~/Desktop/hw-stat133/lab05/copy_files